Check all the energy expenses in the households expenses data set and people expenses data sets are the same than the ones reported in the households condensed data set.
df_hsd_condens$energia is equal to the sum of gastoshogar.gasto_tri if clave is equal to G009-G016, R001, R003 plus gastospersona.gasto_tri if clave is equal to G009-G016, R001, R003.
| Code (clave) | Description |
|---|---|
| G009 | Liquefied petroleum gas |
| G010 | Petroleum |
| G011 | Diesel |
| G012 | Carbon |
| G013 | Firewood |
| G014 | Fuel to heat |
| G015 | Candles |
| G016 | Other fuels |
| R001 | Electricity |
| R003 | Natural Gas |
#df_person_energy
# Create household id, this will be the key attribute in the SQL table
df_hsd_energy$id_household <- paste0(df_hsd_energy$folioviv, df_hsd_energy$foliohog)
df_person_energy$id_household <- paste0(df_person_energy$folioviv, df_person_energy$foliohog)
Merging data sets by id_household using energy expenses related data
df_hsd_energy_by_hsd <- df_hsd_energy %>%
group_by(id_household, clave) %>% replace(is.na(.), 0) %>%
summarise(gasto_tri = sum(gasto_tri))
df_psn_energy_by_hsd <- df_person_energy %>%
group_by(id_household, clave) %>% replace(is.na(.), 0) %>%
summarise(gasto_tri = sum(gasto_tri))
df_energy_by_hsd <- merge(df_hsd_energy_by_hsd, df_psn_energy_by_hsd,
by="id_household", all.y = TRUE, all.x = TRUE) %>%
replace(is.na(.), 0)
df_hsd_energy_by_hsd <- as.data.frame(df_hsd_energy_by_hsd)
df_hsd_energy_by_hsd
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='G009'] <- 'LPG'
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='G010'] <- 'Petroleum'
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='G011'] <- 'Diesel'
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='G012'] <- 'Carbon'
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='G013'] <- 'Firewood'
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='G014'] <- 'Fuel_to_heat'
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='G015'] <- 'Candles'
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='G016'] <- 'Other'
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='R001'] <- 'Electricity'
df_hsd_energy_by_hsd$clave[df_hsd_energy_by_hsd$clave=='R003'] <- 'NG'
#df_hsd_energy_by_hsd$clave <-as.factor(df_hsd_energy_by_hsd$clave)
df_hsd_by_hsd_wd <- reshape(df_hsd_energy_by_hsd, v.names = 'gasto_tri',
timevar='clave', idvar="id_household", sep = "_",
direction="wide")
df_hsd_by_hsd_wd
descr(df_hsd_by_hsd_wd, stats = c("mean", "sd", "min", "med", "max", "n.valid", "pct.valid"),
transpose = TRUE)
pal_plot <- c('grey','red', 'rgb(7,40,89)', 'green')
pal_plot <- setNames(pal_plot, c("elect", "ng", "candles", "lpg"))
#df_hsd_by_hsd_wd[is.na(df_hsd_by_hsd_wd)] = 0
pl_energy_exp_box <- plot_ly(type = 'box') %>%
add_boxplot(y = df_hsd_by_hsd_wd$gasto_tri_Electricity,
boxpoints = 'outliers',
name = "Electricity",
color = list(color =pal_plot['elect']),
marker = list(color = pal_plot['elect']),
line=list(color = pal_plot['elect']) ) %>%
add_boxplot(y = df_hsd_by_hsd_wd$gasto_tri_NG,
boxpoints = 'outliers',
name = "Natural Gas",
color = list(color =pal_plot['ng']),
marker = list(color = pal_plot['ng']),
line=list(color = pal_plot['ng']) ) %>%
add_boxplot(y = df_hsd_by_hsd_wd$gasto_tri_Candles,
boxpoints = 'outliers',
name = "Candles",
color = list(color =pal_plot['candles']),
marker = list(color = pal_plot['candles']),
line=list(color = pal_plot['candles']) ) %>%
add_boxplot(y = df_hsd_by_hsd_wd$gasto_tri_LPG,
boxpoints = 'outliers',
name = "LPG",
color = list(color =pal_plot['lpg']),
marker = list(color = pal_plot['lpg']),
line=list(color = pal_plot['lpg']) ) %>%
layout(title = "Boxplots of Expenses by Household per Type of Energy",
yaxis = list(title = "$[MXN]", range = c(0, 5000)))
pl_energy_exp_box
plotly_IMAGE(pl_energy_exp_box, format = "png", out_file = "./figs/box_energy_expenses.png" )
Boxplots of energy expenses per household
Adding factor
subset(df_dwell, select=c('folioviv', 'factor'))
#df_hsd_by_hsd_wd <- merge(df_hsd_by_hsd_wd, df_dwell,
# by="folioviv", all.y = TRUE, all.x = TRUE)